001 /* 002 * CondorGPBSDispatcher.java 003 * 004 * Created on June 8, 2004, 11:17 AM 005 * 006 * This file is part of the STAR Scheduler. 007 * Copyright (c) 2002-2003 STAR Collaboration - Brookhaven National Laboratory 008 * 009 * STAR Scheduler is free software; you can redistribute it and/or modify 010 * it under the terms of the GNU General Public License as published by 011 * the Free Software Foundation; either version 2 of the License, or 012 * (at your option) any later version. 013 * 014 * STAR Scheduler is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * along with STAR Scheduler; if not, write to the Free Software 021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 */ 023 package gov.bnl.star.offline.scheduler.condorg; 024 025 import gov.bnl.star.offline.scheduler.*; 026 import gov.bnl.star.offline.scheduler.Dispatchers.lsf.CSHApplication; 027 import gov.bnl.star.offline.scheduler.Dispatchers.lsf.LSFDispatcher; 028 import gov.bnl.star.offline.scheduler.util.CSHCommandLineTask; 029 import gov.bnl.star.offline.scheduler.util.FilesystemToolkit; 030 import gov.bnl.star.offline.scheduler.util.StatisticsRecorder; 031 032 import java.io.File; 033 import java.io.FileOutputStream; 034 import java.io.PrintStream; 035 import java.util.*; 036 037 import java.util.logging.Level; 038 import java.util.logging.Logger; 039 040 041 /** Dispatches jobs using Condor-G on a remote site that uses PBS. 042 * It will NOT use extra rsl attributes for PBS. If needed they will 043 * be added later. 044 * @author Alex Withers 045 * @version 1.0 2004/06/08 046 */ 047 public class CondorGPBSDispatcher extends LSFDispatcher { 048 static private Logger log = Logger.getLogger(CondorGPBSDispatcher.class.getName()); 049 050 private static String condorEx; 051 protected CSHApplication application; 052 053 public void setCondorEx(String condorEx) { 054 this.condorEx = condorEx; 055 } 056 057 public String getCondorEx() { 058 return condorEx; 059 } 060 061 /** Creates a new dispatcher */ 062 public CondorGPBSDispatcher() { 063 } 064 065 /** Creates the scripts and dispatches the job on the target machine. 066 * @param request the job request 067 */ 068 public void dispatch(Request request, List jobs) { 069 log.info("Dispatching using Condor-g and LSF: \"" + request.getCommand() + 070 "\""); 071 072 // Enables the simulation mode if necessary 073 useSimulationMode(request.getSimulation()); 074 reportedFailure = false; 075 076 // Submits from the higher to the lower JobID. This way the 077 // user has a feel of when the last job is going to be 078 // submitted 079 for (int nProcess = jobs.size() - 1; nProcess >= 0; 080 nProcess--) { 081 Job job = (Job) jobs.get(nProcess); 082 083 System.out.print("Dispatching process " + 084 job.getJobID() + "."); 085 dispatch(request, job); 086 } 087 088 StatisticsRecorder.getIntance().recordStatistics(request, jobs); 089 } 090 091 protected void dispatch(Request request, Job job) { 092 application = (CSHApplication) ComponentLibrary.getInstance().getComponent("CSHApplication"); 093 094 // TODO: all the parameters should be passed in one go 095 application.setJob(request, job); 096 application.setScratchDir(scratchDir); 097 application.setSubmissionCommand(getCondorGCommand(request, job)); 098 099 application.prepareJob(); 100 prepareClassAd(request, job); 101 102 log.info("Executing \"" + getCondorGCommand(request, job) + "\""); 103 104 if (!simulation) { 105 try { 106 Thread.sleep(getMsBtwnSuccess()); 107 } catch (Exception e) { 108 } 109 110 int attempt = 0; 111 boolean success = false; 112 113 while (!success && (attempt < getMaxAttempts())) { 114 try { 115 CSHCommandLineTask task = new CSHCommandLineTask(getCondorGCommand( 116 request, job), true, 30000); 117 task.execute(); 118 119 if (task.getExitStatus() != 0) { 120 log.warning("bsub failed: " + task.getOutput()); 121 Thread.sleep(getMsBtwnFailure()); 122 System.out.print("/"); 123 attempt++; 124 } else { 125 success = true; 126 } 127 } catch (Exception e) { 128 log.log(Level.SEVERE, 129 "Couldn't submit the script to Condor-g", e); 130 131 try { 132 Thread.sleep(getMsBtwnFailure()); 133 } catch (Exception e1) { 134 } 135 136 System.out.print("/"); 137 attempt++; 138 } 139 } 140 141 if (success) { 142 System.out.println(" done."); 143 } else { 144 System.out.println(" FAILED!!"); 145 } 146 } else { 147 System.out.println(" simulated."); 148 } 149 } 150 151 /** Returns the command line to submit the job through condor-g. 152 * @param request the request that originated the job 153 * @param job the job to be dispatched 154 * @return the commandline to submit the job 155 */ 156 protected String getCondorGCommand(Request request, Job job) { 157 return condorEx + " " + getClassAdName(request, job); 158 } 159 160 /** Returns the name of the file containing the class ad. Class ad is the job 161 * description required by condor to submit a job. 162 * @param request the request that originated the job 163 * @param job the job to be submitted 164 * @return the file name of the class ad 165 */ 166 protected String getClassAdName(Request request, Job job) { 167 return "sched" + job.getJobID() + ".condorg"; 168 } 169 170 private void prepareClassAd(Request request, Job job) { 171 try { 172 PrintStream classAd = new PrintStream(new FileOutputStream( 173 new File(getClassAdName(request, job)))); 174 createClassAd(request, job, classAd); 175 } catch (Exception e) { 176 log.log(Level.SEVERE, "Couldn't create the class ad", e); 177 throw new RuntimeException("Couldn't create the class ad " + 178 getClassAdName(request, job) + ": " + e.getMessage()); 179 } 180 } 181 182 private void createClassAd(Request request, Job job, 183 PrintStream classAd) { 184 classAd.print("executable = "); 185 classAd.println(getExecutable()); 186 187 if (getArguments() != null) { 188 classAd.print("arguments = "); 189 classAd.println(getArguments()); 190 } 191 192 classAd.print("globusscheduler = "); 193 classAd.println(getGlobusScheduler()); 194 195 if (application.getStdin() != null) { 196 classAd.print("input = "); 197 classAd.println(application.getStdin()); 198 } 199 200 if (application.getStdout() != null) { 201 classAd.print("output = "); 202 classAd.println(application.getStdout()); 203 } 204 205 if (application.getStderr() != null) { 206 classAd.print("error = "); 207 classAd.println(application.getStderr()); 208 } 209 210 classAd.print("log = "); 211 classAd.println(getLogName(job)); 212 213 if (getRemoteDirectory() != null) { 214 classAd.print("remote_initialdir = "); 215 classAd.println(getRemoteDirectory()); 216 } 217 218 /* This is basically the main difference from 219 * CondorGLSFDispatcher.java. No globus-rsl stuff. 220 * -- Alex Withers 221 */ 222 /* 223 classAd.print("globusrsl ="); 224 225 if (job.getTarget() != null) { 226 classAd.print(" (xlsfmachine = "); 227 classAd.print(job.getTarget()); 228 classAd.print(")"); 229 } 230 231 if (application.getJobName() != null) { 232 classAd.print(" (xlsfjobname = "); 233 classAd.print(application.getJobName()); 234 classAd.print(")"); 235 } 236 237 if (request.getMail()) { 238 classAd.print(" (xlsfmailreport = "); 239 classAd.print("false"); 240 classAd.print(")"); 241 } else { 242 classAd.print(" (xlsfmailreport = "); 243 classAd.print("true"); 244 classAd.print(")"); 245 } 246 247 if (getResourceUsageSwitch(job) != null) { 248 classAd.print(" (xlsfresources = "); 249 classAd.print(getResourceUsageSwitch(job)); 250 classAd.print(")"); 251 } 252 253 if (job.getQueue() != null) { 254 classAd.print(" (queue = "); 255 classAd.print(job.getQueue()); 256 classAd.print(")"); 257 } 258 259 classAd.println(); 260 */ 261 262 if (isTransferExecutable()) { 263 classAd.println("transfer_executable = true"); 264 } else { 265 classAd.println("transfer_executable = false"); 266 } 267 classAd.println("notification = never"); 268 classAd.println("universe = globus"); 269 classAd.println("queue"); 270 } 271 272 private String getExecutable() { 273 if (application.getCommandLine().indexOf(' ') == -1) { 274 return application.getCommandLine(); 275 } 276 277 return application.getCommandLine().substring(0, 278 application.getCommandLine().indexOf(' ')); 279 } 280 281 private String getArguments() { 282 if (application.getCommandLine().indexOf(' ') == -1) { 283 return null; 284 } 285 286 return application.getCommandLine().substring(application.getCommandLine() 287 .indexOf(' ') + 288 1); 289 } 290 291 private String getLogName(Job job) { 292 // TODO maybe log filename should be put as a general property of Process (as stds) 293 return "sched" + job.getJobID() + ".condorg.log"; 294 } 295 296 private String getGlobusScheduler() { 297 //TODO make it flexible 298 return getGlobusGatekeeper(); 299 } 300 301 private String gatekeeper; 302 303 /** Holds value of property transferExecutable. */ 304 private boolean transferExecutable; 305 306 public void setGlobusGatekeeper(String gatekeeper) { 307 this.gatekeeper = gatekeeper; 308 } 309 310 public String getGlobusGatekeeper() { 311 return gatekeeper; 312 } 313 314 private String remoteInitialDir; 315 316 public void setRemoteInitialDir(String remoteInitialDir) { 317 this.remoteInitialDir = remoteInitialDir; 318 } 319 320 public String getRemoteInitialDir() { 321 return remoteInitialDir; 322 } 323 324 private String getRemoteDirectory() { 325 // TODO this has to be specified better: remote execution directory could be different from scheduler execution directory 326 if (".".equals(getRemoteInitialDir())) return FilesystemToolkit.getCurrentDirectory(); 327 return getRemoteInitialDir(); 328 } 329 330 protected String getResourceUsageSwitch(Job job) { 331 String res = super.getResourceUsageSwitch(job); 332 if (res == null) return res; 333 334 return res.replaceAll("\"", "\\\\\""); 335 } 336 337 /** Getter for property transferExecutable. 338 * @return Value of property transferExecutable. 339 * 340 */ 341 public boolean isTransferExecutable() { 342 return this.transferExecutable; 343 } 344 345 /** Setter for property transferExecutable. 346 * @param transferExecutable New value of property transferExecutable. 347 * 348 */ 349 public void setTransferExecutable(boolean transferExecutable) { 350 this.transferExecutable = transferExecutable; 351 } 352 353 }